{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "!pip install librosa\n",
    "!pip install matplotlib"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import librosa\n",
    "\n",
    "# Path to the audio file\n",
    "audio_file = r'Test_diar_1m.wav'\n",
    "\n",
    "# Read audio\n",
    "samples, sample_rate = librosa.load(audio_file, sr=None, mono=True)\n",
    "\n",
    "# Create timeline based on the number of samples and the sampling rate\n",
    "duration = len(samples) / sample_rate\n",
    "time = np.linspace(0., duration, len(samples))\n",
    "\n",
    "# Visualize waveform as line plot\n",
    "plt.figure(figsize=(10, 4))\n",
    "plt.plot(time, samples)\n",
    "plt.axhline(0, color='black', linestyle='-')  # Horizontal reference line at Y = 0\n",
    "plt.xlabel('Time (s)')\n",
    "plt.ylabel('Amplitude')\n",
    "plt.title('Waveform of an audio file')\n",
    "\n",
    "# Insert gradations and lines\n",
    "plt.xticks(np.arange(0, duration, 5))\n",
    "plt.axvline(x=5, color='red', linestyle='dashed')\n",
    "plt.axvline(x=5.025, color='red', linestyle='dashed')\n",
    "\n",
    "plt.show()\n",
    "\n",
    "# Visualize waveform as dot plot\n",
    "# Set time range for zoom\n",
    "zoom_start = 5  # Zoom start time (in seconds)\n",
    "zoom_end = 5.025  # End time of the zoom (in seconds)\n",
    "\n",
    "# Determine indices for the time domain\n",
    "zoom_start_idx = int(zoom_start * sample_rate)\n",
    "zoom_end_idx = int(zoom_end * sample_rate)\n",
    "\n",
    "# Visualize waveform dot plot in zoom area\n",
    "plt.figure(figsize=(10, 4))\n",
    "plt.scatter(time[zoom_start_idx:zoom_end_idx], samples[zoom_start_idx:zoom_end_idx], s=3)\n",
    "plt.axhline(0, color='black', linestyle='-')  # Horizontal reference line at Y = 0\n",
    "plt.xlabel('Time (s)')\n",
    "plt.ylabel('Amplitude')\n",
    "plt.title('Zoomed in time range of waveform')\n",
    "plt.show()\n",
    "\n",
    "# Visualize waveform as line and dot plot\n",
    "# Set time range for zoom\n",
    "zoom_start = 5  # Zoom start time (in seconds)\n",
    "zoom_end = 5.025  # End time of the zoom (in seconds)\n",
    "\n",
    "# Determine indices for the time domain\n",
    "zoom_start_idx = int(zoom_start * sample_rate)\n",
    "zoom_end_idx = int(zoom_end * sample_rate)\n",
    "\n",
    "# Visualize waveform dot plot in zoom area\n",
    "plt.figure(figsize=(10, 4))\n",
    "plt.plot(time[zoom_start_idx:zoom_end_idx], samples[zoom_start_idx:zoom_end_idx], label='Linie')\n",
    "plt.scatter(time[zoom_start_idx:zoom_end_idx], samples[zoom_start_idx:zoom_end_idx], s=5, label='Points', color=\"black\")\n",
    "plt.axhline(0, color='black', linestyle='-')  # Horizontal reference line at Y = 0\n",
    "plt.xlabel('Time (s)')\n",
    "plt.ylabel('Amplitude')\n",
    "plt.title('Zoomed in time range of waveform')\n",
    "plt.show()\n",
    "\n",
    "# Visualize waveform as line plot with zoom\n",
    "# Set time range for zoom\n",
    "zoom_start = 5  # Zoom start time (in seconds)\n",
    "zoom_end = 5.075  # End time of the zoom (in seconds)\n",
    "\n",
    "# Determine indices for the time domain\n",
    "zoom_start_idx = int(zoom_start * sample_rate)\n",
    "zoom_end_idx = int(zoom_end * sample_rate)\n",
    "\n",
    "# Visualize waveform dot plot in zoom area\n",
    "plt.figure(figsize=(10, 4))\n",
    "plt.plot(time[zoom_start_idx:zoom_end_idx], samples[zoom_start_idx:zoom_end_idx])\n",
    "plt.axhline(0, color='black', linestyle='-')  # Horizontal reference line at Y = 0\n",
    "plt.xticks(np.arange(zoom_start, zoom_end, 0.005))\n",
    "plt.xlabel('Time (s)')\n",
    "plt.ylabel('Amplitude')\n",
    "plt.title('Zoomed in time range of waveform')\n",
    "plt.show()\n",
    "\n",
    "# Calculate Mel spectrogram\n",
    "mel_spec = librosa.feature.melspectrogram(y=samples, sr=sample_rate)\n",
    "\n",
    "# Apply log scale\n",
    "mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max)\n",
    "\n",
    "# Generate time axis and frequency axis\n",
    "time = np.linspace(0., len(samples) / sample_rate, num=mel_spec.shape[1])\n",
    "freq = librosa.mel_frequencies(n_mels=mel_spec.shape[0], fmin=0, fmax=sample_rate/2)\n",
    "\n",
    "# Plot Mel spectrogram\n",
    "plt.figure(figsize=(10, 4))\n",
    "librosa.display.specshow(mel_spec_db, x_axis='time', y_axis='mel', sr=sample_rate, fmin=0, fmax=sample_rate/2)\n",
    "plt.colorbar(format='%+2.0f dB')\n",
    "plt.title('Mel Spectrogram')\n",
    "plt.xlabel('Time (s)')\n",
    "plt.ylabel('Frequency (Hz)')\n",
    "plt.tight_layout()\n",
    "plt.show()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
